{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "labeled = pd.read_csv(\"random100000labeled.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 100000 entries, 0 to 99999\n",
      "Data columns (total 5 columns):\n",
      "oldindex               100000 non-null int64\n",
      "title                  100000 non-null object\n",
      "total_shares           100000 non-null int64\n",
      "url                    100000 non-null object\n",
      "isupliftinganecdote    8999 non-null float64\n",
      "dtypes: float64(1), int64(2), object(2)\n",
      "memory usage: 3.8+ MB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(labeled.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1022\n"
     ]
    }
   ],
   "source": [
    "totaluas = 0\n",
    "for i in range(0,9000):\n",
    "    if labeled['isupliftinganecdote'][i] == 1.0:\n",
    "        totaluas += 1\n",
    "print(totaluas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "upliftinganecdotes = labeled.loc[labeled['isupliftinganecdote'] == 1.0]\n",
    "upliftinganecdotes = upliftinganecdotes.reset_index()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   index  oldindex                                              title  \\\n",
      "0      0    124419  These 6 women got written out of tech history....   \n",
      "1      2    102639  Georgia Democrat aims to be nation’s first fem...   \n",
      "2      3     21523  This Black Cosplayer Is Breaking The Racial Bo...   \n",
      "3      9     22521  Greensboro Teen Accepted To 113 Colleges, Awar...   \n",
      "4     15     44302  She Stepped On Stage To Sing About Angels, Whe...   \n",
      "\n",
      "   total_shares                                                url  \\\n",
      "0         73821  https://www.upworthy.com/these-6-women-got-wri...   \n",
      "1         85356  https://www.washingtonpost.com/national/georgi...   \n",
      "2        256956  https://www.boredpanda.com/kiera-please-multi-...   \n",
      "3        249447  https://www.wfmynews2.com/article/news/local/g...   \n",
      "4        158026  http://www.qpolitical.com/1-she-stepped-on-sta...   \n",
      "\n",
      "   isupliftinganecdote  \n",
      "0                  1.0  \n",
      "1                  1.0  \n",
      "2                  1.0  \n",
      "3                  1.0  \n",
      "4                  1.0  \n"
     ]
    }
   ],
   "source": [
    "print(upliftinganecdotes.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "upliftinganecdotes = upliftinganecdotes[['oldindex', 'title', 'url','isupliftinganecdote']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   oldindex                                              title  \\\n",
      "0    124419  These 6 women got written out of tech history....   \n",
      "1    102639  Georgia Democrat aims to be nation’s first fem...   \n",
      "2     21523  This Black Cosplayer Is Breaking The Racial Bo...   \n",
      "3     22521  Greensboro Teen Accepted To 113 Colleges, Awar...   \n",
      "4     44302  She Stepped On Stage To Sing About Angels, Whe...   \n",
      "\n",
      "                                                 url  isupliftinganecdote  \n",
      "0  https://www.upworthy.com/these-6-women-got-wri...                  1.0  \n",
      "1  https://www.washingtonpost.com/national/georgi...                  1.0  \n",
      "2  https://www.boredpanda.com/kiera-please-multi-...                  1.0  \n",
      "3  https://www.wfmynews2.com/article/news/local/g...                  1.0  \n",
      "4  http://www.qpolitical.com/1-she-stepped-on-sta...                  1.0  \n"
     ]
    }
   ],
   "source": [
    "print(upliftinganecdotes.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "docname = \"/Users/tessmcnulty/1022upliftinganecdotes.csv\"\n",
    "upliftinganecdotes.to_csv(docname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "nonupliftinganecdotes = labeled.loc[labeled['isupliftinganecdote'] == 0.0]\n",
    "nonupliftinganecdotes = nonupliftinganecdotes.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   index  oldindex                                              title  \\\n",
      "0      1        12      Loyal Employees are your Most Valuable Asset!   \n",
      "1      4     22324            52 Week Money Saving Challenge For 2015   \n",
      "2      5    170884  Oklahoma Wesleyan University President Dr. Eve...   \n",
      "3      6     70817       16 Signs You Grew Up In An Italian Household   \n",
      "4      7     23287  Husband Asks Why His Wife Was Fired From A Com...   \n",
      "\n",
      "   total_shares                                                url  \\\n",
      "0       7374965  https://www.linkedin.com/pulse/loyal-employees...   \n",
      "1        250743  http://diycozyhome.com/52-week-money-saving-ch...   \n",
      "2         57886  https://www.nbcnews.com/news/us-news/oklahoma-...   \n",
      "3        112837  http://www.hardcoreitalians.com/news/16-signs-...   \n",
      "4        244210  https://www.boredpanda.com/brads-wife-fired-cr...   \n",
      "\n",
      "   isupliftinganecdote  \n",
      "0                  0.0  \n",
      "1                  0.0  \n",
      "2                  0.0  \n",
      "3                  0.0  \n",
      "4                  0.0  \n"
     ]
    }
   ],
   "source": [
    "print(nonupliftinganecdotes.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "nonupliftinganecdotes = nonupliftinganecdotes.sample(1022)\n",
    "nonupliftinganecdotes = nonupliftinganecdotes.reset_index()\n",
    "nonupliftinganecdotes = nonupliftinganecdotes[['oldindex', 'title', 'url','isupliftinganecdote']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   oldindex                                              title  \\\n",
      "0    144759  31,000 Toys 'R' Us employees: No job and no se...   \n",
      "1     38467  FBI refused White House request to knock down ...   \n",
      "2     47173  Oregon man's petition to strip Congress of the...   \n",
      "3     80002  Having Pets Instead Of Kids Should Be Consider...   \n",
      "4    175403  Is This South African Mongoose Playing Dead or...   \n",
      "\n",
      "                                                 url  isupliftinganecdote  \n",
      "0  https://money.cnn.com/2018/03/16/news/companie...                  0.0  \n",
      "1  https://www.cnn.com/2017/02/23/politics/fbi-re...                  0.0  \n",
      "2  https://www.dailykos.com/story/2017/3/15/16436...                  0.0  \n",
      "3  https://thefederalist.com/2017/05/09/pets-inst...                  0.0  \n",
      "4  https://news.nationalgeographic.com/2018/04/an...                  0.0  \n"
     ]
    }
   ],
   "source": [
    "print(nonupliftinganecdotes.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1022 entries, 0 to 1021\n",
      "Data columns (total 4 columns):\n",
      "oldindex               1022 non-null int64\n",
      "title                  1022 non-null object\n",
      "url                    1022 non-null object\n",
      "isupliftinganecdote    1022 non-null float64\n",
      "dtypes: float64(1), int64(1), object(2)\n",
      "memory usage: 32.0+ KB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(nonupliftinganecdotes.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "docname = \"/Users/tessmcnulty/1022nonupliftinganecdotes.csv\"\n",
    "nonupliftinganecdotes.to_csv(docname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "labeleddata = pd.concat([upliftinganecdotes, nonupliftinganecdotes])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2044 entries, 0 to 1021\n",
      "Data columns (total 4 columns):\n",
      "oldindex               2044 non-null int64\n",
      "title                  2044 non-null object\n",
      "url                    2044 non-null object\n",
      "isupliftinganecdote    2044 non-null float64\n",
      "dtypes: float64(1), int64(1), object(2)\n",
      "memory usage: 79.8+ KB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(labeleddata.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "labeleddataformodel = labeleddata[['title', 'isupliftinganecdote']]\n",
    "labeleddataformodel = labeleddataformodel.reset_index()\n",
    "labeleddataformodel = labeleddataformodel[['title', 'isupliftinganecdote']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                               title  isupliftinganecdote\n",
      "0  These 6 women got written out of tech history....                  1.0\n",
      "1  Georgia Democrat aims to be nation’s first fem...                  1.0\n",
      "2  This Black Cosplayer Is Breaking The Racial Bo...                  1.0\n",
      "3  Greensboro Teen Accepted To 113 Colleges, Awar...                  1.0\n",
      "4  She Stepped On Stage To Sing About Angels, Whe...                  1.0\n"
     ]
    }
   ],
   "source": [
    "print(labeleddataformodel.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "docname = \"/Users/tessmcnulty/labeleddataformodel.csv\"\n",
    "labeleddataformodel.to_csv(docname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
